import sys
sys.path.append('../../')
import BasicLibraries   as BL 
import Make_Population
import global_variables as GV
import os
import regressions
import aux_estimation as ae
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning
warnings.simplefilter('ignore', ConvergenceWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

marker_type, line_style, index_dict, abb_index_dict, inverse_dict, STcontrols = GV.global_vars()
MARGINAL=False
ANNOTATOR='Triple'

############################
data = Make_Population.MakeUniverse()
empiricsBS = regressions.Analysis()
ROB = regressions.RobustnessTest()
############################

'''
This is the code to reproduce the main results of the paper: Figure 3 and Figure 4 in the main text

The code is organised as follow:
    STEP 1. Load the datasets
    STEP 2. Estimate the regression coefficients
    STEP 3. Plot them
    
Note that running this file requires access to several datasets as describe in the main text and the README file

'''


#=========== STEP 1 ============

#%% This step requires access to COMPUSTAT, Refinitiv and Trucost
compustat, regions, sector, tc = data.make_financials()
compustat_clean = compustat.copy()

#%% This is our derived measure of O-ST (the dataset is available in the folder)
system_thinking = BL.pd.read_csv('local_data/OST.csv')
id_mapping = BL.pd.read_csv('local_data/ISIN_gvkey_mapping.csv')[['ISIN', 'gvkey']]
system_thinking = system_thinking.merge(id_mapping)


#%% This step requires access to CDP data
sector_firms = compustat_clean[compustat_clean.GICS_level_1.isin(['Energy', 'Industrial', 'Material', 'Utilities'])]
#=== This file contain a mapping from the ISIN in the CDP dataset to the gvkey identifier in COMPUSTAT
cdp_list = BL.pickle.load( open('local_data/CDP_GVKEY_map.pckl', 'rb'))
cdp_firms = sector_firms[sector_firms.gvkey.isin(cdp_list.gvkey)]
non_cdp_firms = sector_firms[sector_firms.gvkey.isin(cdp_list.gvkey)==False]

#%% This step requires access to the Paris Alignment dataset in TruCost
MAX_ProjectionYear, paris2018,paris2019,paris2020, base_dummies = data.get_paris_for_analysis('below')

#%% This step requires access to data from Asset4
ref, vars_, cdp_t, ref_t = data.make_other_esg(system_thinking, version_ = '_v2')

#%% This step requires access to fundamentals data from Refinitiv
mtb = BL.pd.read_csv('local_data/refinitiv_mtb.csv')
compustat = compustat.merge(mtb[['MTB', 'mrg']])


#%% Here we run the three-stage estimation approach described in the paper. 
#== To assess statistical significance we use bootstrapping
#== This will take a while to run

#=========== STEP 2 ============


robust_explanators = ae.STAGE1(compustat, regions, sector, tc, system_thinking, paris2018,paris2019,paris2020, base_dummies, MAX_ProjectionYear, ref, vars_, cdp_t, ref_t, cdp_firms, non_cdp_firms)
robust_mediators   = ae.STAGE2(compustat, regions, sector, tc, system_thinking, paris2018,paris2019,paris2020, base_dummies, MAX_ProjectionYear, ref, robust_explanators, vars_, cdp_t, ref_t, cdp_firms, non_cdp_firms)
ae.STAGE3(compustat, regions, sector, tc, system_thinking, paris2018,paris2019,paris2020, base_dummies, MAX_ProjectionYear, ref, robust_explanators, robust_mediators, cdp_t, ref_t, cdp_firms, non_cdp_firms)
ae.RollingEstimation(system_thinking, compustat, tc, regions, sector, paris2018,paris2019,paris2020, base_dummies, MAX_ProjectionYear, ref, vars_, cdp_t, ref_t, cdp_firms, non_cdp_firms)


sys.exit()

#%% Now load the data from the previous steps and make Figure 3 and Figure 4 of the main text


#=========== STEP 3 ============

relevance_effect_expl, relevant_explanators_rob  = BL.pickle.load(open('Relevant_explanators_rob'+ANNOTATOR+'.pckl', 'rb'))
relevance_effect_med, relevant_mediators_rob  = BL.pickle.load(open('Relevant_mediators_rob'+ANNOTATOR+'.pckl', 'rb'))
outcome_effects_2 = BL.pickle.load(open('Robustness_to_sample_size'+ANNOTATOR+'_below.pckl', 'rb'))
outcome_effects_15 = BL.pickle.load(open('Robustness_to_sample_size'+ANNOTATOR+'_well_below.pckl', 'rb'))
outcome_effects = BL.pd.concat((outcome_effects_2, outcome_effects_15.loc[['Climate targets', 'Climate targets (wp)']]))
outcome_effects.index = ['Emissions', 'Emissions$_{_{\mathrm{CSR\ policies}}}$', 
                         r'Clim.Trg.$_{_{\mathrm{Below\ 2^{\circ}}}}$', r'Clim.Trg.$_{_{\mathrm{Below\ 2^{\circ}, CSR\ policies}}}$',
                         r'Clim.Trg.$_{_{\mathrm{Well\ Below\ 2^{\circ}}}}$', r'Clim.Trg.$_{_{\mathrm{Well\ Below\ 2^{\circ}, CSR\ policies}}}$']
outcome_effects.index.name = 'factor'

#==== Figure 3
BL.plt.figure(figsize = (20, 8))
ax = BL.plt.subplot(121)
ROB.plot_coefficients(relevance_effect_expl.drop(index = ["Length of text", 'Polysyllables', 'Positive sentiment', 'Dale-Chall index']),  ax, [-0.37,5.9,'A'])
ax = BL.plt.subplot(122)
ROB.plot_coefficients(relevance_effect_med, ax, [-0.16,10.2,'B'])
BL.plt.tight_layout()

#==== Figure 4
BL.plt.figure(figsize = (20, 6))
ax = BL.plt.subplot(121)
ROB.plot_FIGURE4PANELA(outcome_effects,  ax, [-0.22,-0.68,'A'])
ax = BL.plt.subplot(122)
tab = ROB.plot_time_stability(ax = ax, ANNOTATOR='Triple')
BL.plt.tight_layout()



